# mTurk maker 
################################################################################
# Dependencies
################################################################################
library(data.table)
library(purrr)
library(lubridate)
library(readr)
library(dplyr)
library(readr)
library(readxl)
library(irr)
library(tidyr)
################################################################################
# Setup
################################################################################
rm(list=ls())
# - set dir
args = commandArgs()

scriptName = args[substr(args,1,7) == '--file=']

if (length(scriptName) == 0) {
  scriptName <- rstudioapi::getSourceEditorContext()$path
} else {
  scriptName <- substr(scriptName, 8, nchar(scriptName))
}

pathName = substr(
  scriptName, 
  1, 
  nchar(scriptName) - nchar(strsplit(scriptName, '.*[/|\\]')[[1]][2])
)

setwd(pathName)

emo_vec <- read_file("./emoji_regex_vector.txt")

emojy_replace <- function(string, replacment, emo_vec){
  stringr::str_replace_all(string, emo_vec, replacment)
}
################################################################################
# Relevance
################################################################################
org <- read_xlsx("../data/research_assistants_data/relevance_task/annotation_data_ra_completed.xlsx")
org <- org %>% filter(!is.na(relevant_paula) & !is.na(relevant_fabio))

org <- org %>% mutate(relevance_agreement_check = ifelse(relevant_paula == relevant_fabio, TRUE, FALSE))

#RA Agreement
org %>% group_by(relevance_agreement_check) %>% summarise(n = n()) %>% mutate(f = n/sum(n))

org <- org %>% mutate(relevant_ra = ifelse(relevance_agreement_check == T, relevant_fabio, NA))

write_csv(org,"../data/research_assistants_data/relevance_task/training_data_relevance.csv")
org <- org %>% mutate(text = emojy_replace(text, "", emo_vec = emo_vec))
write_csv(org,"../data/mturk_data/training_data_relevance.csv")
################################################################################
# Problem Solution
################################################################################
org <- read_xlsx("../data/research_assistants_data/problem_solution_task/annotation_data_ra_completed.xlsx")
org <- org %>% filter(!is.na(relevant_paula) & !is.na(relevant_fabio))

org <- org %>% mutate(relevance_agreement_check = ifelse(relevant_paula == relevant_fabio, TRUE, FALSE))
org <- org %>% filter(relevance_agreement_check == TRUE) %>% 
               mutate(problem_agreement_check = ifelse(problem_frame_paula == problem_frame_fabio, TRUE, FALSE),
                      solution_agreement_check = ifelse(solution_frame_paula == solution_frame_fabio, TRUE, FALSE))


org <- org %>% mutate(problem_solution_agreement_check = ifelse(solution_agreement_check == T & problem_agreement_check == T, TRUE, FALSE))

#RA Agreement
org %>% group_by(problem_solution_agreement_check) %>% summarise(n = n()) %>% mutate(f = n/sum(n))

org <- org %>% 
  mutate(problem_solution_ra = ifelse(problem_solution_agreement_check == T & (problem_frame_paula + problem_frame_fabio + solution_frame_paula + solution_frame_fabio) == 4, "Both",
                                      ifelse(problem_solution_agreement_check == T & (problem_frame_paula + problem_frame_fabio + solution_frame_paula + solution_frame_fabio) == 0, "Neither",
                                             ifelse(problem_solution_agreement_check == T & problem_frame_fabio == 1, "Problem",
                                                    ifelse(problem_solution_agreement_check == T & solution_frame_paula, "Solution", NA)))))

org <- org %>% select(-c(problem_agreement_check,solution_agreement_check))
org <- org %>% distinct(status_id, .keep_all = T)

write_csv(org,"../data/research_assistants_data/problem_solution_task/training_data_problem_solution.csv")
org <- org %>% mutate(text = emojy_replace(text, "", emo_vec = emo_vec))
org <- org %>% filter(relevance_agreement_check == T) %>% filter(relevant_paula == 1)

org <- org %>% filter(!problem_solution_ra %in% c("Both"))

org_meysam <- read_csv("../data/chatGPT_data//training_data_problem_solution_final.csv",
                       col_types = cols(status_id = col_character()))

org_meysam <- org_meysam %>% filter(relevant_check == T) %>% 
                             filter(relevant == TRUE) %>% filter((solution_frame == T & problem_frame == T) == F)


write_csv(org,"../data/mturk_data/training_data_problem_solution_final.csv")
################################################################################
# Frames
################################################################################
org <- read_xlsx("../data/research_assistants_data/frames_task/tweets_annotation_data_ra_completed.xlsx")
org <- org %>% filter(relevant == T)

org <- org %>% mutate(frames_agreement_check = ifelse(frame_name_primary_fabio == frame_name_primary_paula, TRUE, FALSE))

#RA Agreement
org %>% group_by(frames_agreement_check) %>% summarise(n = n()) %>% mutate(f = n/sum(n))

org <- org %>% mutate(frames_ra = ifelse(frames_agreement_check == TRUE, frame_name_primary_fabio, NA))


org <- org %>% select(-c(problem_frame,solution_frame,topic_name_second_fabio,topic_name_secondary_paula,
                         frame_name_secondary_fabio,frame_name_secondary_paula,topic_name_fabio,topic_name_paula))

org <- org %>% distinct(status_id, .keep_all = T)

org <- org %>% mutate(text = emojy_replace(text, "", emo_vec = emo_vec))

org_meysam <- read_csv("../data/chatGPT_data//training_data_frames_final.csv",
                       col_types = cols(status_id = col_character()))
org_meysam$`...1` <- NULL

write_csv(org_meysam,"../data/research_assistants_data/frames_task/training_data_frames.csv")
org_meysam <- org_meysam %>% mutate(text = emojy_replace(text, "", emo_vec = emo_vec))
write_csv(org_meysam,"../data/mturk_data/training_data_frames_final.csv")
################################################################################
# Stance
################################################################################
org <- read_xlsx("../data/research_assistants_data/stance_task/section230_training_data_completed.xlsx")

#RA Agreement
org %>% filter(is.na(stance_check) == F) %>% group_by(stance_check) %>% summarise(n = n()) %>% mutate(f = n/sum(n))

org <- org %>% filter(stance_check == T)

org <- org %>% mutate(stance_ra = ifelse(`stance pro` == 1, "Positive Stance", 
                                         ifelse(`stance negative` == 1, "Negative Stance",
                                                ifelse(`stance neutral` == 1, "Neutral Stance", "Neutral Stance"))))

org <- org %>% mutate(text = emojy_replace(text, "", emo_vec = emo_vec))
org <- org %>% select(-c(`stance pro`, `stance negative`, `stance neutral`))
write_csv(org,"../data/mturk_data/training_data_stance_final.csv")
################################################################################
# Topics
################################################################################
org <- read_xlsx("../data/research_assistants_data/topic_task/tweets_annotation_data_ra_completed.xlsx")

org <- org %>% filter(relevant == T)
org <- org %>% mutate(topic_agreement_check = ifelse(topic_name_fabio == topic_name_paula, TRUE, FALSE))

#RA Agreement
org %>% group_by(topic_agreement_check) %>% summarise(n = n()) %>% mutate(f = n/sum(n))


# Merge General Problem & Personal Problem
org_meysam <- read_csv("../data/chatGPT_data/training_data_topics_final.csv",
                       col_types = cols(status_id = col_character()))

org <- org_meysam %>% filter(relevant == T)
org <- org %>% mutate(text = emojy_replace(text, "", emo_vec = emo_vec))

write_csv(org,"../data/mturk_data/training_data_topics_final.csv")
